## About
# Here I analyze the profilometer data.
# I calculate the mean thickness and the bottom thickness.
# I only include specifical intervals in the calculations,
# because those are the intervals that were leveled by the Vision 64 software.
# Since the substrate are not flat and leveling works with linear functions, not
# all of the samples could be leveled at once.

## !! Warning: works only properly when there are not other csv files in the folder (like the statistics output)

## Clean environment
rm(list=ls())

## Read data
files <- list.files(path="//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15", 
                    pattern="*.csv", full.names=TRUE, recursive=FALSE)
resolution <- 0.833278 # scan resolution in um

## Create empty lists to store the data
sampleFull_lst <- vector("list", length(files))
thickness_lst <- vector("list", length(files))
std_lst <- vector("list", length(files))
sem_lst <- vector("list", length(files))
baseline_lst <- vector("list", length(files))
baseline_sd_lst <- vector("list", length(files))
baseline_sem_lst <- vector("list", length(files))
peaks_mean_lst <- vector("list", length(files))
peaks_sd_lst <- vector("list", length(files))
peaks_sem_lst <- vector("list", length(files))
n_lst <- vector("list", length(files))
npeaks_lst <- vector("list", length(files))
nbaseline_lst <- vector("list", length(files))

## Calculations
library(stringr) # package for regular expressions
#library(pracma) # to find local minima and maxima (alternative: findPeaks or findValleys in quantmod package)
library(quantmod) # findValleys/findPeaks
for (i in 1:length(files)) {
  if (str_detect(files[i], "stats")){ # ignore file if it is a statistics file
    next
  }
  ## Extract sample name
  sampleNameFull <- str_extract(files[i],"[a-zA-Z0-9()\\-\\_]*\\.csv$") # regex which extracts the sample from the path name.
  
  ## Extract leveled sample area
  leveled_range <- str_extract(files[i], "[0-9]+\\-[0-9]+mm") # regex which extracts the leveled area from the path name.
  x1 <- as.numeric(str_extract(leveled_range, "^[0-9]+")) # start of string (first number)
  x2 <- as.numeric(str_extract(leveled_range, "[0-9]+(?=mm)")) # second number (preceded by mm)
  
  ## read file
  profil <- read.csv(file=files[i], header=TRUE, sep=",",dec=".",skip=22) # load file
  
  ## calculations
  profi_data_um <- profil[round(5000/resolution):round(13000/resolution),1:2]*0.0001 # convert data to microns and only take central 700 um
  
  ## Plot profile
  # profi_data_um_full <- profil[,1:2]*0.0001 # Check full profiles
  # plot(profi_data_um_full) # check full profiles
  # plot(profi_data_um_full, xlim = c(x1/10, x2/10)) # check full profiles
  # title(main = sampleName) # check full profiles
  
  # Calculate mean thickness and baseline values
  thickness_mean <- mean(profi_data_um$Total.Profile..., na.rm = TRUE)
  thickness_std <- sd(profi_data_um$Total.Profile..., na.rm = TRUE)
  data_cleaned <- na.omit(profi_data_um$Total.Profile...)
  thickness_sem <- thickness_std/sqrt(length(data_cleaned))
  
  peaks_x <- findPeaks(profi_data_um$Total.Profile..., thresh = 0)
  peaks <- profi_data_um$Total.Profile...[peaks_x]
  peaks2 <- peaks[peaks > 10] # 10 um as an arbitrary threshold for aggregates/unexfoliated flakes
  peaks_mean_lst[i] <- mean(peaks2)
  peaks_sd_lst[i] <- sd(peaks2)
  peaks_sem_lst[i] <- sd(peaks2)/sqrt(length(peaks2))
  
  lows_x <- findValleys(profi_data_um$Total.Profile..., thresh = 0)
  lows <- profi_data_um$Total.Profile...[lows_x]
  lows2 <- lows[lows > 0  & lows <= mean(peaks2)] # exclude negative data (likely corresponding to pinholes) and shoulder peaks
  baseline_lst[i] <- mean(lows2)
  baseline_sd_lst[i] <- sd(lows2)
  baseline_sem_lst[i] <- sd(lows2)/sqrt(length(lows2))
  
  ## store object in the list, by name
  sampleFull_lst[i] <- sampleNameFull
  thickness_lst[i] <- thickness_mean
  std_lst[i] <- thickness_std
  sem_lst[i] <- thickness_sem
  n_lst[i] <- length(data_cleaned)
  npeaks_lst[i] <- length(peaks2)
  nbaseline_lst[i] <- length(lows2)
}

## Convert lists into dataframe columns 
df <- data.frame(unlist(sampleFull_lst), unlist(thickness_lst), unlist(std_lst),
                 unlist(sem_lst), unlist(n_lst), unlist(peaks_mean_lst), unlist(peaks_sd_lst),
                 unlist(peaks_sem_lst), unlist(npeaks_lst), unlist(baseline_lst), unlist(baseline_sd_lst),
                 unlist(baseline_sem_lst), unlist(nbaseline_lst)) 
# Names of columns of dataframe 
names(df) <- c("Sample", "Thickness_mean", "Thickness_sd", "Thickness_sem", "Number_points",
               "Peaks_mean", "Peaks_sd", "Peaks_sem", "Number_peak_points",
               "Baseline_mean", "Baseline_sd", "Baseline_sem", "Number_baseline_points") 
# print(df) 

## Write to csv
write.table(df, "//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15/LH_PU_VTT_2021-01-15_stats_20210208.csv",
            dec = ",", sep=";", col.names = TRUE, row.names = FALSE)
